
# Re-cluster putative mural cell populations from pdgfrb:egfp, 5 dpf
# scRNA-seq dataset originally published in Shih et al. Development (2021),PMID: 34751773 

# for re-clustering ---> pull out barcodes for cells in ID'd clusters in pdgfrb:egfp analysis, then go back to  
# original count data (after filtering, but before normalization, identify variable features, etc. .) and pull those cells out

library(Seurat)
library(ggplot2)

load("/Users/nathan/Dropbox (UMass Medical School)/LabProjects/Pericyte/1_SCanalysis/2_scRNAseq/2021_5_0_pdgfrbEgfpPos/ReclusterPdgfrbHiCells/AllCellsForReClustering.RData")

#setwd for testing purposes
setwd("oneLastScriptTest/")


# data objects in "AllCellsForReClustering.RData":
# allCellsForClusterReID = all pdgfrb:egfp 5 dpf cells after clustering as in Shih et al.
#
# allCells = all pdgfrb:egfp 5 dpf cells from duplicate libraries, after filtering
#   for MT and counts as in Shih et al, but PRIOR
#   to normalization, scaling, finding variable features, etc. .
#
##--> both of these can be generated using cell ranger output files (GSE176129)
##    using previously published Seurat commands  


# subset putative mural cells
# N.B.: smooth muscle cells (smc) will include gut, pharyngeal, as well as vascular smc 
muralCellClusters = subset(allCellsForClusterReID, idents = c("39-pericyte","14-smc",
                                                              "52-smc","17-smc","51-smc","53-smc",
                                                              "6-smc"))
muralCellBarcodes = WhichCells(muralCellClusters)

# show which cells on UMAP are being used for subclustering
pdf(file = "1_DimPlotHighLightMurals.pdf", height = 8, width = 8)
DimPlot(allCellsForClusterReID, reduction = "umap", cells.highlight = muralCellBarcodes,
        label=TRUE,repel = TRUE)+NoLegend()
dev.off()

# pull cells from raw count data using barcodes for cells of interest to be re-clustered
muralCellsOnly = subset(allCells, cells = muralCellBarcodes)
muralCellsOnly
#An object of class Seurat 
#36351 features across 1290 samples within 1 assay 
#Active assay: RNA (36351 features, 0 variable features)
table (muralCellsOnly$orig.ident)
#pdgfrb.1 pdgfrb.2 
#561      729

#split them back up for normalization/finding variable features
putMurCell.list = SplitObject(muralCellsOnly, split.by = "orig.ident")
putMurCell.list = lapply(X = putMurCell.list, FUN = function(x) {
  x = NormalizeData(x)
  x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration; run PCA on each
# dataset using these features
features <- SelectIntegrationFeatures(object.list = putMurCell.list)
putMurCell.list <- lapply(X = putMurCell.list, FUN = function(x) {
  x <- ScaleData(x, features = features, verbose = FALSE)
  x <- RunPCA(x, features = features, verbose = FALSE)
})

# identify anchor points between datasets
# from https://satijalab.org/seurat/articles/integration_rpca.html
putMurCell.anchors <- FindIntegrationAnchors(object.list = putMurCell.list, anchor.features = features, reduction = "rpca", 
                                             k.anchor = 10)
putMurCell.combined <- IntegrateData(anchorset = putMurCell.anchors)
putMurCell.combined <- ScaleData(putMurCell.combined,)
putMurCell.combined <- RunPCA(putMurCell.combined, npcs = 25)

#PC17, res 1.3
putMurCell.combinedPC17 <- RunUMAP(putMurCell.combined, reduction = "pca", dims = 1:17)
putMurCell.combinedPC17 <- FindNeighbors(putMurCell.combinedPC17, reduction = "pca", dims = 1:17)
DefaultAssay(putMurCell.combinedPC17)='integrated'
putMurCell.combinedPC17res1.3 <- FindClusters(putMurCell.combinedPC17, resolution = 1.3)

# see what the clusters look like
DefaultAssay(putMurCell.combinedPC17res1.3)= "RNA"
p1=DimPlot(putMurCell.combinedPC17res1.3, reduction = "umap", label = TRUE) + NoLegend()
p2=VlnPlot(putMurCell.combinedPC17res1.3, features = c("pdgfrb","notch3","ndufa4l2a","rasl12","kcne4","rgs5a",
                                                       "ednraa", "kcnj8","foxc1b","lamb1b","desmb","acta2","myocd",
                                                       "cnn1b","tagln","pdgfra","col1a1a","lum","dcn"),
           stack = TRUE, flip = TRUE)+NoLegend()

pdf(file = "2_DimAndVlnPlotsPC17Res1.3.pdf", height = 6, width = 12)
p1 | p2
dev.off()

# split out by replicate library to check
pdf(file = "2b_DimPlotsPC17Res1.3splitByRep.pdf", height = 6, width = 12)
DimPlot(putMurCell.combinedPC17res1.3, reduction = "umap", label = TRUE, repel = TRUE, 
        split.by = "orig.ident") + NoLegend()
dev.off()

# output average expression values for known pericyte, smc, and fibroblast markers
# use these to help assign putative cell identities; sort in Excel
write.csv(AverageExpression(putMurCell.combinedPC17res1.3, assays = "RNA",
                            features = c("pdgfrb","notch3","ndufa4l2a","rasl12","kcne4","rgs5a",
                                         "ednraa", "kcnj8","foxc1b","lamb1b","desmb","acta2","myocd",
                                         "cnn1b","tagln","pdgfra","col1a1a","lum","dcn")),
          file = "3_avgExpSMCperiMarkers.csv")

# get IDs for remaining clusters from all markers
allMarkers = FindAllMarkers(putMurCell.combinedPC17res1.3, logfc.threshold = 0.5)
write.csv(allMarkers, file = "4_allMarkers.csv")

# change cluster order
newClstOrder = c(12,5,15,1,6,14,16,9,13,4,
                 17,2,11,0,10,3,7,8)
Idents(putMurCell.combinedPC17res1.3) = factor(Idents(putMurCell.combinedPC17res1.3), levels=newClstOrder)


p3 = VlnPlot(putMurCell.combinedPC17res1.3, features = c("pdgfrb","notch3","ndufa4l2a","rasl12",
                                                         "kcne4","rgs5a", "ednraa", "kcnj8",
                                                         "foxc1b","lamb1b","desmb","myocd",
                                                         "cnn1b","tagln","pdgfra","dcn",
                                                         "elnb", "nkx3.2","tbx1","dlx4a"),
             stack = TRUE, flip = TRUE)+NoLegend()

pdf(file = "2c_DimAndVlnPlotsPC17Res1.3orderClst.pdf", height = 6, width = 12)
p1 | p3
dev.off()

#name clusters 
clusterNames = c("peri.12","peri.5","peri-like.15",
                 "fib.1","fib.6","smc-like.14",
                 "smc.16","smc.9","smc.13","smc.4",
                 "smc.17","smc.2","smc.11","smc.0",
                 "bulbArt.10","phArch.3","phArch.7","phArch.8")
names(clusterNames) = levels(putMurCell.combinedPC17res1.3)
putMurCell.combinedPC17res1.3 = RenameIdents(putMurCell.combinedPC17res1.3, clusterNames)

# Vln plot with named clusters; signature genes
pdf(file = "2e_VlnPlotsPC17Res1.3orderClstNamed.pdf", height = 6, width = 6)
VlnPlot(putMurCell.combinedPC17res1.3, features = c("pdgfrb","notch3","ndufa4l2a","rasl12",
                                                    "kcne4","rgs5a", "ednraa", "kcnj8",
                                                    "foxc1b","lamb1b","desmb","myocd",
                                                    "cnn1b","tagln","pdgfra","dcn",
                                                    "elnb", "nkx3.2","tbx1","dlx4a"),
        stack = TRUE, flip = TRUE)+NoLegend()
dev.off()

# plots for main Figure
pdf(file = "5_DimPlotWithClstNames.pdf", height = 6, width = 6)
DimPlot(putMurCell.combinedPC17res1.3, reduction = "umap", label = TRUE) + NoLegend()
dev.off()

pdf("5b_VlnPlotsWithClstIDandLymphaticGenes.pdf", height = 6, width = 6)
VlnPlot(putMurCell.combinedPC17res1.3, 
        features = c("pdgfrb","ndufa4l2a","myocd","pdgfra",
                     "elnb","nkx3.2","tbx1","dlx4a",
                     "pdgfrb","vegfc","ccbe1","adamts3",
                     "adamts14","svep1","cxcl12a","cxcl12b"),
        stack = TRUE, flip = TRUE)+NoLegend()
dev.off()

# check Vegfc and Cxcl12 receptor expression
# previously identified fibroblast markers from Wang et al. and Rajan et al.
pdf(file = "6_Receptors_WangAndRajanMarkers.pdf", height = 3.5, width = 8)
VlnPlot(putMurCell.combinedPC17res1.3, features = c("kdr","kdrl","flt4",
                                                    "ackr3a","ackr3b",
                                                    "cxcr4a","cxcr4b",
                                                    "pdgfrb","nkx3-1","en1a","en1b"),
        stack = TRUE, flip = TRUE)+NoLegend()
dev.off()






